## Create Militarization dataset --------
library(tidyverse)
library(readxl)
library(openintro)
library(lubridate)
library(reshape2)
source("lib/fix-names.R")
options(scipen=0, digits=7) #to standardize number of digits when exporting data. all data here is run using these options.

#----------------------------------------------------------------#
# Military Aid Data --------------
#----------------------------------------------------------------#
path <- "raw-Data/DISP_AllStatesAndTerritories_03312018.xlsx"
LESOlist <- path %>% 
  excel_sheets() %>% 
  set_names() %>% 
  map(read_excel, path = path)

LESO <- do.call(rbind,LESOlist) %>% 
  rename(agency_name=`Station Name (LEA)`) %>%
  mutate(state_abbrv = State,
         State=abbr2state(State),
         `Ship Date` =  ymd_hms(`Ship Date`),
         year = year(`Ship Date`)) %>%
  filter(!is.na(State)) %>% # remove all missing States b/c territories: 155,934 obs  
  identify_nonlocal(.)

controlled_subset <- FALSE

if(controlled_subset){
  controlled <- read.csv("data/controlled-items-by-NSN.csv")
  LESO <- left_join(LESO, controlled, by="NSN")
  table(LESO$controlled)
  LESO <- filter(LESO, controlled==1)
}

# Read in fips data and match by agency name -------------
fips <- read_csv("raw-data/clean-countiesandfips.csv") %>%
  select(agency,agency_name,State,city,County)

#militarization
mdata <- left_join(LESO,fips,
                   by = c("State"="State",
                          "agency_name"="agency_name")) %>%
  fix_county_names(.)

# EC "NAVAJO POLICE DEPT, TUBA CTY DIST" is in military data but not counties
# they had 3 trucks
mdata$city[mdata$agency_name=="NAVAJO POLICE DEPT, TUBA CTY DIST"] <- "Tuba City"
mdata$County[mdata$agency_name=="NAVAJO POLICE DEPT, TUBA CTY DIST"] <- "Coconino County"

mdata$city[mdata$agency_name=="WI DEPT OF JUSTICE, DCI (LEA)"] <- "Madison"
mdata$County[mdata$agency_name=="WI DEPT OF JUSTICE, DCI (LEA)"] <- "Dane County"

##next, need to find the federal supply category based on the NSN listed in the data
#the NSN is comprised of three components: an FSC, country of origin, and two unique numbers
#each NSN is comprised of [0-10][0-10][0-10][0-10], the first two digits of which are the groups
#next two identify type, but for our purposes we only need the groups
#(i.e. group 10 is weapons, but 1005 is guns through 30 mm)
##and, to create an accurate indicator for the Harris et al. tactical items, we must unite
#the federal supply code and itemtype back into one FSC, to ensure we have accurate measure
#of all tactical items
#now, for Harris, we need all tactical items, which are defined as three broad categories
#1 - weapons (guns and grenade launchers)
#2 - optics (day sights and night optics)
#3 - vehicles (military trucks, aircraft, mine resistant vehicles, armored personnel carriers)
#then, need an indicator for each respective grouping:
#weapons - group 10 is Weapons (guns, launchers, miscellaneous weapons)
#optics - FSC code 1240 refers to optical sighting and ranging equipment
#vehicles - group 15 is aricraft and airframe structural components
#group 23 is ground effect vehicles, motor vehicles, trailers, cycles (passenger cars, trucks,
#trailers, combat vehicles)
#Harris also break down vehicles into utility vehicles vs. combat vehicles
#based on the descriptions of items in group 23, moto vehicles, trucks, trailers, etc.
#are classified as utility vehicles, whereas 2350 refers specifically to combat vehicles
mdata <- mdata %>%
  separate(NSN,c("FSC","country","uniqueno","uniqueno2"))  %>%
  separate(FSC,into=c("federalsupplycode","itemtype"),sep = 2)%>%
  mutate(FSC = do.call(paste, c(.[c("federalsupplycode","itemtype")], sep="")),
         BGsupplycategory = ifelse(federalsupplycode==10 | federalsupplycode==13 | ##create indicators for weapons, vehicles, gears, as in Bove and Gavrilova
                                      federalsupplycode==14,"BGWeapons",ifelse(
                                        federalsupplycode==12 | federalsupplycode==42 |
                                          federalsupplycode==58 | federalsupplycode==59 |
                                          federalsupplycode==67 | federalsupplycode==69 |
                                          federalsupplycode==84 | federalsupplycode==78 ,"BGGears",ifelse(
                                            federalsupplycode==15 | federalsupplycode==16 |
                                              federalsupplycode==17 | federalsupplycode==19 |
                                              federalsupplycode==20 | federalsupplycode==23 |
                                              federalsupplycode==24 | federalsupplycode==25 |
                                              federalsupplycode==26 | federalsupplycode==28 |
                                              federalsupplycode==29 | federalsupplycode==30,
                                            "BGVehicles","BGOthers"))),
         HARRISsupplycategory = ifelse(federalsupplycode==10,"HWeapons",
                                        ifelse(FSC==1240,"HOptics",
                                               ifelse(FSC==2305 | FSC==2310 |
                                                        FSC==2320 |
                                                        FSC==2330 | FSC==2340,
                                                      "HUtilityVehicles",
                                                      ifelse(FSC==2350 | federalsupplycode==15,
                                                             "HCombatVehicles","HNonTactical"))))) 

# Total Agency level ------- 
###creating variables of interest: sum of awards given, sum of awards value given to each agency
mdata_agg <- mdata %>%
  group_by(agency_name,city,County,State,year,BGsupplycategory) %>%
  summarise(sumawards=sum(`Acquisition Value`*Quantity,na.rm=T),sumquantity=sum(Quantity,na.rm=T)) %>%
  na.omit(.) 

#this creates a long dataframe so we can do the next piece of code
militarizationaggmelt <- melt(mdata_agg,measure.vars=c("sumawards","sumquantity"))

#this gets sum of awards given, sum of awards value by each type of category (weapons, gears, etc.)
sumawardsjurisdictionBG <- dcast(militarizationaggmelt, agency_name+year+State+city+County ~ 
                                   variable+BGsupplycategory,
                                 value.var="value")%>%
  mutate(sumallLESOaidBG = rowSums(.[,c("sumawards_BGGears","sumawards_BGOthers",
                                        "sumawards_BGVehicles","sumawards_BGWeapons")], 
                                   na.rm=TRUE), #sum all LESO aid (money)
         sumallLESOquantityBG  = rowSums(.[,c("sumquantity_BGGears","sumquantity_BGOthers",
                                               "sumquantity_BGVehicles","sumquantity_BGWeapons")], 
                                          na.rm=TRUE) )  

######################do for Harris
mdata_aggHARRIS <- mdata %>%
  group_by(agency_name,city,County,State,year,HARRISsupplycategory) %>%
  summarise(sumawards=sum(`Acquisition Value`*Quantity,na.rm = T),
            sumquantity=sum(Quantity),na.rm=T) %>%
  na.omit(.) 

#this creates a long dataframe so we can do the next piece of code
militarizationaggmeltHARRIS <- melt(mdata_aggHARRIS,measure.vars=c("sumawards","sumquantity"))

#this gets sum of awards given, sum of awards value by each type of category (weapons, gears, etc.)
sumawardsjurisdictionHARRIS <- dcast(militarizationaggmeltHARRIS, agency_name+year+State+city+County ~ 
                                       variable+HARRISsupplycategory,
                                     value.var="value") %>%
  mutate(sumallLESOaidHARRIS=rowSums(.[,c("sumawards_HCombatVehicles",
                                          "sumawards_HOptics","sumawards_HUtilityVehicles",
                                          "sumawards_HWeapons","sumawards_HNonTactical")], 
                                         na.rm=TRUE), #sum all LESO aid (money)
         sumallLESOquantityHARRIS= rowSums(.[,c("sumquantity_HCombatVehicles",
                                                "sumquantity_HOptics","sumquantity_HUtilityVehicles",
                                                "sumquantity_HWeapons","sumquantity_HNonTactical")], na.rm=TRUE))

#join BG and Harris together
sumawardsjurisdiction <- inner_join(sumawardsjurisdictionBG,
                                    sumawardsjurisdictionHARRIS) %>%
  replace(., is.na(.), 0) %>%
  mutate(iscounty = as.integer(str_detect(agency_name, ###now, need to find the jurisdictions that are likely counties vs. those that are cities
                                           paste(c("COUNTY", "CTY"),collapse = '|')))) 

# join with indicator for local
local <- mdata %>% 
  select(agency_name,local) %>% 
  unique() 

sumawardsjurisdiction <- left_join(sumawardsjurisdiction,local,
                                   by=c("agency_name"="agency_name")) %>%
  select(agency_name,year,city,County,State,everything()) %>%
  arrange(State,city,year)

# Make balanced panel -----------
# Make dataset of every agency for every year
places <- sumawardsjurisdiction %>% 
  select(agency_name,city,County,State,iscounty,local) %>% 
  distinct()
years <- 1990:2018
complete <- do.call("bind_rows", replicate(length(years), places, simplify = FALSE))
complete$year <- sort(rep(years,nrow(places)))

# merge back in awards infor
complete <- left_join(complete,sumawardsjurisdiction) %>% 
  replace(., is.na(.), 0) 

# Save data at agency level ---------
if(controlled_subset){
  save(complete,file="data/sum_LESO_agency_controlled.RData")
  write.csv(complete,"data/sum_LESO_agency_controlled.csv",row.names=F,na="")
} else {
  save(complete,file="data/sum_LESO_agency.RData")
  write.csv(complete,"data/sum_LESO_agency.csv",row.names=F,na="")  
}


######################################################################################################
######################################################################################################
# AGGREGATE TO COUNTY LEVEL ---------------
######################################################################################################
######################################################################################################
###the above code is getting the summary data by agency name, but now need to aggregate to the county
##so we can compare the results of city to county
sumawardscounty <- complete %>%
  group_by(County,year,State)%>%
  summarise_at(vars(contains("sum")), sum, na.rm=TRUE)

county <- read_csv("raw-data/FIPS-County-Codes.txt",col_names = F) %>%
  rename(State_abbrv=X1,state.fips=X2, county.fips=X3,County=X4,type=X5)%>%
  mutate(fips = paste0(state.fips,county.fips),
         State = abbr2state(State_abbrv)) %>%
  select(-type) 

sumawardscounty <- left_join(sumawardscounty, county, 
                             by=c("State"="State","County")) %>%
  select(County,State,State_abbrv,year,fips,everything()) %>%
  arrange(State,County,year) # 70412 observations, 2428 for each year  

# Save data at county level ---------
if(controlled_subset){
  save(sumawardscounty,file="data/sum_LESO_county_controlled.RData")
  write.csv(sumawardscounty,"data/sum_LESO_county_controlled.csv",row.names=F,na="")  
} else {
  save(sumawardscounty,file="data/sum_LESO_county.RData")
  write.csv(sumawardscounty,"data/sum_LESO_county.csv",row.names=F,na="")
}
 

